# This R script contains various functions used in the simulation and data analysis.
# I compare with: Zhou, Feng, and Du 2017 (ZFD), and Yang, Narisetty, and He 2018 (YNH).

# The most important functions are:

  # ZFDfit: implements Zhou, Feng, and Du (2017) interval-censored quantile regression.
  # censor: generates interval-censored data, assuming a certain mechanism for the visit times.
  # sim1, sim2, ...: simulate interval-censored data under different scenarios (only sim1 implemented).
  # simulator + auxiliary functions "print" and "plot": run the simulation, summarize the results.


#########################################################################
#########################################################################
#########################################################################

ZFDfit <- function(formula, data, tau){

  LL <- function(beta, x,tL,tR, tau){
    eta <- x%*%cbind(beta)
    out <- (1 - tau)*abs(tR - pmax(tR, eta)) + tau*abs(tL - pmin(tL, eta))
    sum(out)
  }

  cl <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(stats::model.frame)
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  Y <- model.response(mf); tL <- Y[,1]; tR <- Y[,2]
  x <- model.matrix(mt,mf)

  stats <- c(
     left = mean(tL == -Inf),
     right = mean(tR == Inf),
     interval = mean(tL != -Inf & tR != Inf & tL != tR),
     observed = mean(tL == tR)
  )

  tL <- pmax(tL, min(tL[is.finite(tL)]))
  tR <- pmin(tR, max(tR[is.finite(tR)]))

  t <- (tL + tR)/2
  beta0 <- lm.fit(x,t)$coef
  beta <- NULL
  for(i in tau){
    o <- optim(beta0, LL, x = x, tL = tL, tR = tR, tau = i)
    beta <- cbind(beta, o$par)
  }
  colnames(beta) <- paste("tau =", tau)
  rownames(beta) <- colnames(x)

  out <- list(beta = beta, stats = stats, mf = mf, tau = tau)
  class(out) <- "ZFD"
  out
}

print.ZFD <- function(x){
  print(round(x$beta,3))
  cat("\n")
  cat("stats: \n")
  print(round(x$stats,3))
}

#########################################################################
#########################################################################
#########################################################################

# From time 0, there are "regular" visits: the distance between two visits
  # is a random Exp(lambda) variable. The visits continue until the event
  # has not been reached, implying that all individuals will be
  # interval-censored (no right or left censoring).

censor <- function(t, lambda = 1){

  n <- length(t)
  tL <- tR <- NULL
  for(i in 1:n){
    done <- FALSE
    vL <- 0
    while(!done){
      vR <- vL + rexp(1,lambda)
      if(t[i] <= vR){done <- TRUE; tL[i] <- vL; tR[i] <- vR}
      vL <- vR
    }
  }

  data.frame(tL = tL, tR = tR, t = t)
}

#########################################################################
#########################################################################
#########################################################################

sim1 <- function(n,lambda){
  x1 <- runif(n)
  x2 <- rexp(n)
  x3 <- rbinom(n, 1, 0.5)

  u <- runif(n)
  t <- (-1*log(1 - u)) + (3*u)*x1 + (3*u^2)*x2 + (3*sqrt(u))*x3
  Y <- censor(t,lambda)
  beta <- list(
   beta0 = function(tau){-1*log(1 - tau)},
   beta1 = function(tau){3*tau},
   beta2 = function(tau){3*tau^2},
   beta3 = function(tau){3*sqrt(tau)}
  )
  list(
    data = data.frame(t = Y$t, tL = Y$tL, tR = Y$tR, x1 = x1, x2 = x2, x3 = x3), 
    formula = formula(Surv(tL,tR, type = "interval2") ~ x1 + x2 + x3), beta = beta
  )
}

#########################################################################
#########################################################################
#########################################################################

# NOTE: as in YNH's examples on breast cancer, I use y = (tL + tR)/2 in DArq(y, x, tL, tR, ...)

simulator <- function(sim, n = 1000, B = 1000, tau = c(0.2,0.4,0.6,0.8), print.each = 50, ...){

  test <- sim(n = 10, ...)
  q <- length(test$beta)
  r <- length(tau)
  true <- lapply(test$beta, function(b) b(tau))

  beta <- matrix(NA, B, r); colnames(beta) <- paste("tau =", tau)
  out <- list(); for(i in 1:q){out[[i]] <- beta}; names(out) <- paste0("beta",0:(q - 1))

  beta_icqr <- beta_ZFD <- beta_YNH <- out
  est_se_icqr <- cover_icqr <- out
  time_icqr <- time_ZFD <- time_YNH <- NULL

  for(i in 1:B){
    s <- sim(n, ...)
    tL <- s$data$tL; tR <- s$data$tR
    X <- s$data[, grep("x", names(s$data))]

    time_ZFD[i] <- system.time(m_ZFD <- ZFDfit(s$formula, s$data, tau = tau))[3]
    time_YNH[i] <- system.time(m_YNH <- suppressWarnings(DArq((tL + tR)/2, X, delta = rep(3,n), tL, tR, taus = tau, iter = 100, tol = 0.01, bootstrap = FALSE)))[3]
    time_icqr[i] <- system.time(m_icqr <- ctqr(s$formula, data = s$data, p = tau))[3]; V <- vcov(m_icqr)

    for(j in 1:q){

      beta_ZFD[[j]][i,] <- m_ZFD$beta[j,]
      beta_YNH[[j]][i,] <- m_YNH$coef[j,]

      beta.icqr <- m_icqr$coef[j,]
      est.se.icqr <- sapply(V, function(x) sqrt(x[j,j]))
      low <- beta.icqr - 1.96*est.se.icqr
      up <- beta.icqr + 1.96*est.se.icqr

      beta_icqr[[j]][i,] <- beta.icqr
      est_se_icqr[[j]][i,] <- est.se.icqr
      cover_icqr[[j]][i,] <- (low <= true[[j]] & up >= true[[j]])
    }

    if(round(i/print.each) == i/print.each){print(i)}
  }
  
  outfun <- function(true,beta){
    est <- colMeans(beta)
    se <- apply(beta,2,sd)
    mse <- colMeans((beta - t(matrix(true, length(true), nrow(beta))))^2)
    A <- cbind(true = true, est = est, mse = mse, se = se)
    rownames(A) <- colnames(beta)
    A
  }
  out_icqr <- out_ZFD <- out_YNH <- list()
  for(j in 1:q){
   out_icqr[[j]] <- outfun(s$beta[[j]](tau), beta_icqr[[j]])
   out_icqr[[j]] <- cbind(out_icqr[[j]], est.se = colMeans(est_se_icqr[[j]]), cover = colMeans(cover_icqr[[j]]))
   out_ZFD[[j]] <- outfun(s$beta[[j]](tau), beta_ZFD[[j]])
   out_YNH[[j]] <- outfun(s$beta[[j]](tau), beta_YNH[[j]])
  }
  out <- list(beta_icqr = beta_icqr, beta_ZFD = beta_ZFD, beta_YNH = beta_YNH,
    est_se_icqr = est_se_icqr, cover_icqr = cover_icqr, 
    out_icqr = out_icqr, out_ZFD = out_ZFD, out_YNH = out_YNH, 
    tau = tau, true = s$beta,
    time = data.frame(icqr = time_icqr, ZFD = time_ZFD, YNH = time_YNH))
  class(out) <- "sim"
  out
}

print.sim <- function(x){

  q <- length(x$true)
  for(j in 1:q){
    cat("\n")
    print(paste0("beta", j - 1, ": icqr"))
    cat("\n")
    print(round(x$out_icqr[[j]],2))
    cat("\n")

    print(paste0("beta", j - 1, ": ZFD"))
    cat("\n")
    print(round(x$out_ZFD[[j]],2))
    cat("\n")

    print(paste0("beta", j - 1, ": YNH"))
    cat("\n")
    print(round(x$out_YNH[[j]],2))
    cat("\n")

    print("#######################################################")
    cat("\n")
  }
}

# Use var = 0 for the intercept, and var = j for x_j.
# Use what = "beta" for the betas, and what = "se" for the estimated standard errors of "icqr"

plot.sim <- function(obj, which = c("icqr", "ZFD", "YNH"), what = c("beta", "se"), var = 0){

  tau <- obj$tau
  which <- which[1]; what <- what[1]
  if(which == "ZFD" & what == "se"){stop("no estimated standard errors are provided by ZFDfit")}

  beta <- (if(which == "icqr") obj$beta_icqr[[var + 1]] else obj$beta_ZFD[[var + 1]])
  se <- obj$est_se_icqr[[var + 1]]

  if(what == "beta"){target <- beta; name <- "beta"}
  else{target <- se; name <- "est.se"}


  title <- (if(var == 0) "Intercept" else paste0("x", var))
  for(j in 1:ncol(target)){

   hist(target[,j], main = title, xlab = paste0(name, var, "(",tau[j],") --- ", which), br = 100)

   if(what == "beta"){
     abline(v = obj$true[[var + 1]](tau[j]), col = "red", lwd = 2)
   }
   else{
     abline(v = obj$out_icqr[[var + 1]][j,"se"], col = "red", lwd = 2)
   }
  }
}








